clear all 



global root="/Replication file"



*use full count Census data including the following variables: 
//region stateicp statefip urban ownershp nchild nchlt5 eldch sex age marst race hispan nativity school lit labforce occ occ1950 ind1950 occscore erscor50

merge m:1 statefip year using "$root/Data/state_laws.dta"
drop _merge



//////////////////////
//cleaning dataset///
/////////////////////

*drop those in group quarter institutions or in other group quarters
keep if gq==1

rename state STATE
encode STATE,gen(state) 

keep if age>16
keep if age<65

*** 5 extra state-year observations that should be dropped (frequency 1)
*dropping wyoming in 1860 
drop if state==49 & year==1860
*dropping oklahoma in 1870
drop if state==35 & year==1870
*dropping oklahoma in 1880
drop if state==35 & year==1880
*dropping montana in 1860
drop if state==25 & year==1860
*dropping idaho in 1860
drop if state==11 & year==1860



//info on labforce missing from full count 1900. Generating using info from IPUMS.
replace labforce=1 if occ1950>970 & year==1900
replace labforce=2 if occ1950 <971 & year==1900| occ1950 == 979 & year==1900


gen employed=1 if labforce==2
replace employed=0 if labforce==1



gen married=1 if marst~=6
replace married=0 if marst==6
replace married=. if year==1860 | year==1870

*excluding divorced and widowed
gen married_1=1 if marst==1 | marst==2
replace married_1=0 if marst==6

gen divorced=1 if mars==4
replace divorced=0 if mars==1 | marst==2

gen white=1 if race==1
replace white=0 if white==. 

gen hispanic=0 if hispan==0
replace hispanic=1 if hispanic==.

gen native=1 if nativity==1|nativity==2|nativity==3|nativity==4
replace native=0 if nativity==5


*creating industries: 
gen mining=0
replace mining=1 if ind1950==206 | ind1950==216 | ind1950==226 | ind1950==236 | ind1950==239

gen services=0
replace services=1 if ind1950==826 | ind1950==836 | ind1950==846 | ind1950==847 | ind1950==848 | ind1950==849

gen mercantile=0 
replace mercantile=1 if ind1950>600 & ind1950<700

gen manufac_durable=0
replace manufac_durable=1 if ind1950>300 & ind1950<400

gen manufac_non_durable=0 
replace manufac_non_durable=1 if ind1950>400 & ind1950<500

gen other_industry=1 if mining==0 & services==0 & mercantile==0 & manufac_durable==0 & manufac_non_durable==0
replace other_industry=0 if other_industry==.


gen industry="mining" if mining==1
replace industry="services" if services==1
replace industry="manufac_durable" if manufac_durable==1
replace industry="manufac_non_durable" if manufac_non_durable==1
replace industry="mercantile" if mercantile==1
replace industry="other" if industry==""




*defining female and male dominated industries
drop if ind1950==000 & employed==1
bys state year ind1950: egen total_employed=sum(employed)
gen employed_women=1 if employed==1 & sex==2
gen employed_men=1 if employed==1 & sex==1


bys state year ind1950: egen total_employed_men=sum(employed_men)
bys state year ind1950: egen total_employed_women=sum(employed_women)


gen share_women_industry=total_employed_women/total_employed
gen share_men_industry=total_employed_men/total_employed

gen female_dominated=1 if share_women_industry>=0.66
gen male_dominated=1 if share_women_industry<0.34


gen employed_female_dominated=1 if employed==1 & female_dominated==1
replace employed_female_dominated=0 if employed==0

gen employed_male_dominated=1 if employed==1 & male_dominated==1
replace employed_male_dominated=0 if employed==0




*for laws passage (states): 
gen seating_law_control=1 if seating_law==1 & year_seating_law<year
replace seating_law_control=0 if seating_law_control==.

gen prohibitory_law_control=1 if prohibitory_law==1 & year_prohibitory_law<year
replace prohibitory_law_control=0 if prohibitory_law_control==.

gen night_work_law_control=1 if night_work_laws==1 & year_night_work<year
replace night_work_law_control=0 if night_work_law_control==.


*intensity measure: years of exposure to law: Seating Law
gen seating_law_years=year-year_seating_law
replace seating_law_years=0 if seating_law_years<0
replace seating_law_years=0 if seating_law_years==.

*intensity measure: years of exposure to law: Regulatory Law
gen regulatory_years=year-year_prohibitory_law
replace regulatory_years=0 if regulatory_years<0
replace regulatory_years=0 if regulatory_years==.

*intensity measure: years of exposure to law: Night work Law
gen nightwork_years=year-year_night_work
replace nightwork_years=0 if nightwork_years<0
replace nightwork_years=0 if nightwork_years==.

*** Effect in "number of decades NOT years"
replace seating_law_years=seating_law_years/10
replace regulatory_years=regulatory_years/10
replace nightwork_years=nightwork_years/10



/*domestic work occupations:
700	Housekeepers, private household
710	Laundresses, private household
720	Private household workers (n.e.c.)
772	Midwives
*/

gen restricted_sample=1  
replace restricted_sample=0 if occ1950==700 | occ1950==710 | occ1950==720 | occ1950==772



//save main_data
save "$root/Data/main_data.dta", replace

